# -*- coding: utf-8 -*-
"""
Auxiliary functions for cyclic_analysis.py

Created on Wed Apr 28 09:23:30 2021

@author: s168562
"""


def read_data(filename, filepath, headerline, outpath, nr_cycles, max_strain, 
              min_strain, no_outpath = False):
    ''' Reads file data, prepares output folder and extracts header information
    from the input file'''
    ### import packages
    import pandas as pd
    import os # for file paths
    
    ### Output
    outfolder = filename[:-4] # eliminate the extension
    outpathfull = os.path.join(outpath,outfolder)
    if not no_outpath:
        if not os.path.exists(outpathfull):
            os.makedirs(outpathfull)
    
    ### Read data
    fullpath = os.path.join(filepath,filename)
    # straining_df = pd.read_csv(fullpath, sep = "\t", header = 111, index_col=0) # somehow doesn't interpret the row numbers correctly
    straining_df = pd.read_csv(fullpath, sep = "\s+", skiprows = headerline,
                               header = None, index_col=False, engine = 'python') # python engine because trailing spaces not supported by c
    # straining_df.columns.values[0] = "Time" #Get rid of semicolon in name;
    straining_df.columns = ["Time", "Position", "Force", "Direction", 
                                   "Cycles_done", "Cycle_line", "Switch",
                                   "R-Time", "Resist_A"]
    
    ### Extract file details from txt file
    search_dic = {'Initial Value=': 'Rini',
                  'Sample ID=':'sampleID',
                  'Substrate.Material=':'mat',
                  'Test length=':'length',
                  'Sample.Width=':'width',
                  'Printed line.Width=':'linewidth',
                  }
    
    value_dic = {}
    
    linenr = 0
    with open(fullpath, 'rt') as headerfile:
        #header = [next(headerfile) for line in range(100)]
        while linenr < 100:
            for line in headerfile:
                linenr += 1
                for key, value in search_dic.items():
                    if line.find(key) != -1: # Rini
                        idx = line.find(key) + len(key)
                        pvalue = line[idx:-1] # Parameter value, Exclude \n
                        value_dic[value] = pvalue
    
    Rini = float(value_dic['Rini'])
    sampleID = value_dic['sampleID']
    mat = value_dic['mat']
    length = float(value_dic['length'])
    width = float(value_dic['width'])
    linewidth = float(value_dic['linewidth'])
    return straining_df, Rini, sampleID, mat, length, width, linewidth, outpathfull
    
    
    
def calc_cycles(straining_df, nr_cycles, length, max_strain, min_strain, Rini,
                remove_first_point):
    ''' Perform calculations on the cyclic data to extract the min and max 
    resistance at the minimum strain (defined as the last data point before
    the strain starts to increase) and at the maximum strain (defined as the 
    first data point of the upper plateau). Those data points
    are also averaged over 10 and 100 cycles. Also extracts the
    resistance at 1% and 19% strain.'''   
    # Import packages
    import pandas as pd
    import numpy as np
    
    # cycles = list(range(1,nr_cycles+1))
    straining_df['Strain'] = straining_df['Position']/length 
    
    ## Extract min and max resistance per cycle and one point at 1% and one point at 19% strain
    # Note: these points are close to the min/max strain within each cycle, but do
    # not necessarily represent the absolute min/max due to hysteresis effects.
    # for idx, row in straining_df.iterrows(): 
    #     #strain >= max_strain 
    #     print(row)
    cutoff_h = max_strain - 0.01 # For each cycle, collect one data point at 1% strain
    cutoff_l = min_strain + 0.01 # For each cycle, collect one data point at 19% strain
    straining_df['Min_pos'] = False # Start of cycle (last point valley)
    straining_df['Max_pos'] = False # start of top plateau 
    straining_df['Max_pos2'] = False # end of top plateau
    straining_df['Min_pos2'] = False # end of cycle (first point of valley)
    straining_df['Cutoff_h_pos'] = False
    straining_df['Cutoff_l_pos'] = False
    for i in range(len(straining_df)):
        if (round(straining_df['Strain'].iloc[i], 2) >= max_strain and 
            straining_df['Cycles_done'].iloc[i] <= (nr_cycles)):
            if round(straining_df['Strain'].iloc[i+1], 2) < max_strain:
                straining_df['Max_pos2'][i] = True
        elif (round(straining_df['Strain'].iloc[i], 2) <= min_strain and 
            straining_df['Cycles_done'].iloc[i] <= (nr_cycles)):
            if round(straining_df['Strain'].iloc[i+1], 2) > min_strain:
                straining_df['Min_pos'][i] = True
        elif (round(straining_df['Strain'].iloc[i], 2) < max_strain and 
            straining_df['Cycles_done'].iloc[i] <= (nr_cycles)):
            if round(straining_df['Strain'].iloc[i+1], 2) >= max_strain:
                straining_df['Max_pos'][i+1] = True
            elif (round(straining_df['Strain'].iloc[i], 2) > min_strain):
                if (round(straining_df['Strain'].iloc[i], 2) == cutoff_l and 
                  round(straining_df['Strain'].iloc[i+1], 2) > cutoff_l):
                    straining_df['Cutoff_l_pos'][i] = True
                elif round(straining_df['Strain'].iloc[i+1], 2) <= min_strain:
                    straining_df['Min_pos2'][i+1] = True
     
    # Failed to integrate the statement below in the logic flow above without 
    # 'overlapping' logic (preventing access to certain statements because the
    # condition above was already met). It should be possible, however!
    for i in range(len(straining_df)):
        if (round(straining_df['Strain'].iloc[i], 2) == cutoff_h and 
                  round(straining_df['Strain'].iloc[i+1], 2) > cutoff_h and 
            straining_df['Cycles_done'].iloc[i] <= (nr_cycles)):
                    straining_df['Cutoff_h_pos'][i] = True
            
            
    # Check correctness by checking the number of positions captured
    print(sum(straining_df['Min_pos']))
    print(sum(straining_df['Max_pos']))
    print(sum(straining_df['Max_pos2']))
    print(sum(straining_df['Min_pos2']))
    print(sum(straining_df['Cutoff_h_pos']))
    print(sum(straining_df['Cutoff_l_pos']))
    
    # Save the relevant data points at 0/1/19/20 % strain for calculations
    strain_series_max = straining_df['Resist_A'][straining_df['Max_pos']]
    strain_series_min = straining_df['Resist_A'][straining_df['Min_pos']]
    strain_series_max2 = straining_df['Resist_A'][straining_df['Max_pos2']] # not really needed
    strain_series_min2 = straining_df['Resist_A'][straining_df['Min_pos2']] # not really needed
    strain_series_h = straining_df['Resist_A'][straining_df['Cutoff_h_pos']]
    strain_series_l = straining_df['Resist_A'][straining_df['Cutoff_l_pos']]
    
    # Extract first point in unloading cycle where stress = 0
    straining_df['Min_force_pos'] = False # Start of cycle (last point valley)
    for i in range(len(straining_df)):
        if (straining_df['Cycles_done'].iloc[i] <= (nr_cycles) and 
            round(straining_df['Force'].iloc[i], 2) > 0 and 
            round(straining_df['Force'].iloc[i+1], 2) > 0 and # extra steps to avoid catching small fluctuations. this now gives 1000 hits.
            round(straining_df['Force'].iloc[i+2], 2) > 0 and
            round(straining_df['Force'].iloc[i+3], 2) > 0 and
            round(straining_df['Force'].iloc[i+4], 2) > 0 and
            round(straining_df['Force'].iloc[i+5], 2) > 0 and
            round(straining_df['Force'].iloc[i+6], 2) > 0 and # till here
            round(straining_df['Force'].iloc[i+7], 2) > 0 and 
            round(straining_df['Force'].iloc[i+8], 2) > 0 and
            round(straining_df['Force'].iloc[i+9], 2) > 0 and
            round(straining_df['Force'].iloc[i+10], 2) > 0 and 
            round(straining_df['Force'].iloc[i+11], 2) > 0 and
            round(straining_df['Force'].iloc[i+12], 2) > 0 and
            round(straining_df['Force'].iloc[i+13], 2) > 0 and 
            round(straining_df['Force'].iloc[i+14], 2) > 0 and
            round(straining_df['Force'].iloc[i+15], 2) > 0 and
            round(straining_df['Force'].iloc[i+16], 2) > 0 and
            round(straining_df['Force'].iloc[i+17], 2) > 0 
            ):
                if round(straining_df['Force'].iloc[i+18], 2) <= 0:
                    straining_df['Min_force_pos'][i+18] = True
                    # print(straining_df['Cycles_done'].iloc[i])
    strain_series_residual_strain = straining_df['Strain'][straining_df['Min_force_pos']]
    
    # Number all cycles (each starting at Min_pos and ending at Min_pos - 1)
    # Everything before the first inclination is labeled cycle 0.
    # Also extract uphill and downhill slopes
    cycle_count = 0
    straining_df['Cycle_nr'] = 0
    straining_df['Cycle_time'] = 0
    straining_df['Uphill'] = False
    straining_df['Downhill'] = False
    
    # uphill_list = []
    # downhill_list = []
    
    cycle_start = np.where(straining_df['Min_pos'])[0]
    plateau1 = np.where(straining_df['Max_pos'])[0]
    plateau2 = np.where(straining_df['Max_pos2'])[0]
    valley1 = np.where(straining_df['Min_pos2'])[0] # cycle end
    for i in range(len(cycle_start)):
        cycle_count += 1 # Start first real cycle as number 1
        cycle_start_time = straining_df['Time'].iloc[cycle_start[i]]
        # ulist = [] # empty list to collect all uphill resistance values
        # dlist = [] # empty list to collect all downhill resistance values
        for uphill_idx in range(cycle_start[i], plateau1[i]):
            straining_df['Uphill'].iloc[uphill_idx] = True
            # ulist.append(straining_df['Resist_A_norm'].iloc[uphill_idx])
        for downhill_idx in range(plateau2[i], valley1[i]):
            straining_df['Downhill'].iloc[downhill_idx] = True
            # dlist.append(straining_df['Resist_A_norm'].iloc[downhill_idx])
        # uphill_list.append(ulist)
        # downhill_list.append(dlist)
        if i < (len(cycle_start) - 1):
            for cycle_idx in range(cycle_start[i], cycle_start[i+1]):
                straining_df['Cycle_nr'].iloc[cycle_idx] = cycle_count
                straining_df['Cycle_time'].iloc[cycle_idx] = straining_df['Time'].iloc[cycle_idx] - cycle_start_time
        else: # last cycle
            for cycle_idx in range(cycle_start[i], len(straining_df)):
                straining_df['Cycle_nr'].iloc[cycle_idx] = cycle_count
                straining_df['Cycle_time'].iloc[cycle_idx] = straining_df['Time'].iloc[cycle_idx] - cycle_start_time

    # uphill_array = np.array(uphill_list) 
    # downhill_array = np.array(downhill_list) 
    
    # Optionally remove the first data point, because it would skew the average over the first 
    # 100/10 cycles massively due to the initial resistance being present
    if remove_first_point:
        print('yo')
        strain_series_max = strain_series_max[1:]
        strain_series_min = strain_series_min[1:]
        strain_series_h = strain_series_h[1:]
        strain_series_l = strain_series_l[1:]
    
    # average over 100 cycles
    series_max100 = []
    series_min100 = []
    series_h100 = []
    series_l100 = []
    series_max100_sd = []
    series_min100_sd = []
    series_h100_sd = []
    series_l100_sd = []
    for i in range(10):
        series_max100.append(np.mean(strain_series_max.values[i*100:(i+1)*100]))
        series_max100_sd.append(np.std(strain_series_max.values[i*100:(i+1)*100]))
        series_min100.append(np.mean(strain_series_min.values[i*100:(i+1)*100]))
        series_min100_sd.append(np.std(strain_series_min.values[i*100:(i+1)*100]))
        series_h100.append(np.mean(strain_series_h.values[i*100:(i+1)*100]))
        series_h100_sd.append(np.std(strain_series_h.values[i*100:(i+1)*100]))
        series_l100.append(np.mean(strain_series_l.values[i*100:(i+1)*100]))
        series_l100_sd.append(np.std(strain_series_l.values[i*100:(i+1)*100]))
        
    straining100_df = pd.DataFrame(list(zip(series_max100, series_min100,
                                         series_h100, series_l100,
                                         series_max100_sd, series_min100_sd,
                                         series_h100_sd, series_l100_sd)),
                                columns = ['max', 'min', 'cutoff_h', 'cutoff_l',
                                           'max_sd', 'min_sd', 'cutoff_h_sd', 
                                           'cutoff_l_sd'])
        
    # average over 10 cycles
    series_max10 = []
    series_min10 = []
    series_h10 = []
    series_l10 = []
    series_max10_sd = []
    series_min10_sd = []
    series_h10_sd = []
    series_l10_sd = []
    for i in range(100):
        series_max10.append(np.mean(strain_series_max.values[i*10:(i+1)*10]))
        series_max10_sd.append(np.std(strain_series_max.values[i*10:(i+1)*10]))
        series_min10.append(np.mean(strain_series_min.values[i*10:(i+1)*10]))
        series_min10_sd.append(np.std(strain_series_min.values[i*10:(i+1)*10]))
        series_h10.append(np.mean(strain_series_h.values[i*10:(i+1)*10]))
        series_h10_sd.append(np.std(strain_series_h.values[i*10:(i+1)*10]))
        series_l10.append(np.mean(strain_series_l.values[i*10:(i+1)*10]))
        series_l10_sd.append(np.std(strain_series_l.values[i*10:(i+1)*10]))
        
    straining10_df = pd.DataFrame(list(zip(series_max10, series_min10,
                                         series_h10, series_l10,
                                         series_max10_sd, series_min10_sd,
                                         series_h10_sd, series_l10_sd)),
                                columns = ['max', 'min', 'cutoff_h', 'cutoff_l',
                                           'max_sd', 'min_sd', 'cutoff_h_sd', 
                                           'cutoff_l_sd'])
    
    return (straining_df, straining100_df, straining10_df, strain_series_max, 
            strain_series_max2, strain_series_min, strain_series_min2,
              strain_series_residual_strain)